suppressPackageStartupMessages(library(igraph))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(source("CCLasso.R"))

set.seed(19825791)

# coefficient of variation
co.var <- function(x, na.rm = TRUE) {
  100 * (sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm))
}

# kjhealy/polar-labels.r on github:
# https://gist.github.com/kjhealy/834774#file-polar-labels-r
radian.rescale <- function(x,
                           start = 0,
                           direction = 1) {
  c.rotate <- function(x) (x + start) %% (2 * pi) * direction
  c.rotate(scales::rescale(x, c(0, 2 * pi), range(x)))
}

merge.easy <- function(df1, df2, key) {
  df1 <- data.table(df1, key = key)
  df2 <- data.table(df2, key = key)
  return(as.data.frame(unique(
    merge(
      df1,
      df2,
      all.x = TRUE,
      by = .EACHI,
      allow.cartesian = TRUE
    )
  ), stringsAsFactors = FALSE))
}

# quartile coefficient of dispersion
qcd <- function(x) {
  (quantile(x, 0.75) - quantile(x, 0.25)) /
  (quantile(x, 0.75) + quantile(x, 0.25))
}

plot_network <- function(ig, coord = layout_with_fr(ig), ...) {
  plot(
    ig,
    layout = coord,
    vertex.size = 2,
    vertex.label = V(ig)$name,
    vertex.label.dist = 1,
    vertex.label.cex = 0.25,
    vertex.label.degree = radian.rescale(
      x = 1:vcount(ig),
      direction = -1,
      start = 0
    ),
    edge.color = ifelse(E(ig)$weight > 0, 'green', 'red'),
    ...
  )
}

Load Data and Metadata

Variance and Coefficient of Variation Summary Statistics

##
## Call:
## lm(formula = log(apply(pfam_data, 2, var)) ~ log(apply(pfam_data,
##     2, mean, na.rm = TRUE)))
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -2.6631 -0.4140  0.0129  0.3877  3.6636
##
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                  3.151887   0.016713   188.6
## log(apply(pfam_data, 2, mean, na.rm = TRUE)) 1.095131   0.004401   248.8
##                                              Pr(>|t|)
## (Intercept)                                    <2e-16 ***
## log(apply(pfam_data, 2, mean, na.rm = TRUE))   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7279 on 6091 degrees of freedom
## Multiple R-squared:  0.9104, Adjusted R-squared:  0.9104
## F-statistic: 6.191e+04 on 1 and 6091 DF,  p-value: < 2.2e-16
##
## Call:
## lm(formula = log(apply(pfam_data, 2, co.var)) ~ log(apply(pfam_data,
##     2, mean, na.rm = TRUE)))
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1.33154 -0.20699  0.00644  0.19387  1.83182
##
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                   6.181113   0.008357   739.7
## log(apply(pfam_data, 2, mean, na.rm = TRUE)) -0.452435   0.002201  -205.6
##                                              Pr(>|t|)
## (Intercept)                                    <2e-16 ***
## log(apply(pfam_data, 2, mean, na.rm = TRUE))   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3639 on 6091 degrees of freedom
## Multiple R-squared:  0.874,  Adjusted R-squared:  0.874
## F-statistic: 4.227e+04 on 1 and 6091 DF,  p-value: < 2.2e-16

Build Network

# CCLasso takes many hours to run (load cached if available)
ccl_pfam_file <- "data/ccl_pfam_cv200.Rda"
if (!file.exists(ccl_pfam_file)) {
  ccl_pfam <- cclasso(as.matrix(pfam_data), counts = TRUE)
  save(ccl_pfam, file = ccl_pfam_file)
} else {
  load(ccl_pfam_file)
}
ccl_pfam$cor_w[is.nan(ccl_pfam$cor_w)] <- 0
ccl_pfam_pvals_vec <- ccl_pfam$p_vals[upper.tri(ccl_pfam$p_vals)]
ccl_pfam_pvals_adj <- p.adjust(ccl_pfam_pvals_vec, "BH")
ccl_pfam_edges <- ccl_pfam$cor_w[upper.tri(ccl_pfam$cor_w)]
# p-value < 1e-4 otherwise CCLasso network number of edges is very large
ccl_pfam_edges[ccl_pfam_pvals_adj > 1e-4] <- 0
ccl_pfam_amat <- matrix(0, dim(pfam_data)[2], dim(pfam_data)[2])
rownames(ccl_pfam_amat) <- colnames(pfam_data)
colnames(ccl_pfam_amat) <- colnames(pfam_data)
ccl_pfam_amat[upper.tri(ccl_pfam_amat)] <- ccl_pfam_edges
ccl_pfam_g <- graph_from_adjacency_matrix(
  ccl_pfam_amat,
  mode = "upper",
  weighted = TRUE,
  diag = FALSE
)
ccl_pfam_g <- induced_subgraph(
  ccl_pfam_g,
  V(ccl_pfam_g)[
    components(ccl_pfam_g)$membership ==
    which.max(components(ccl_pfam_g)$csize)
  ]
)
ccl_pfam_p_g <- delete_edges(
  ccl_pfam_g,
  E(ccl_pfam_g)[E(ccl_pfam_g)$weight < 0]
)
ccl_pfam_p_g <- induced_subgraph(
  ccl_pfam_p_g,
  V(ccl_pfam_p_g)[
    components(ccl_pfam_p_g)$membership ==
    which.max(components(ccl_pfam_p_g)$csize)
  ]
)
ccl_pfam_p_g_v_msk <- (
  V(ccl_pfam_g)$name %in% V(ccl_pfam_p_g)$name
)
ccl_pfam_n_g <- delete_edges(
  ccl_pfam_g,
  E(ccl_pfam_g)[E(ccl_pfam_g)$weight > 0]
)
ccl_pfam_n_g <- induced_subgraph(
  ccl_pfam_n_g,
  V(ccl_pfam_n_g)[
    components(ccl_pfam_n_g)$membership ==
    which.max(components(ccl_pfam_n_g)$csize)
  ]
)
ccl_pfam_n_g_v_msk <- (
  V(ccl_pfam_g)$name %in% V(ccl_pfam_n_g)$name
)

Visualize Network

Network Statistics

Community Detection in Networks Using InfoMap and SpinGlass

num_top_clusts <- 3

# InfoMap +Network
ccl_pfam_p_g_imap <- cluster_infomap(ccl_pfam_p_g)
ccl_pfam_p_g_imap_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_p_g_imap), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_p_g_imap_top_v_msk <- (
  ccl_pfam_p_g_imap$membership %in% ccl_pfam_p_g_imap_top_ids
)
ccl_pfam_p_g_imap_top_g <- induced_subgraph(
  ccl_pfam_p_g,
  V(ccl_pfam_p_g)[ccl_pfam_p_g_imap_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_p_g_imap_top <- ccl_pfam_p_g_imap
ccl_pfam_p_g_imap_top$names <-
  ccl_pfam_p_g_imap_top$names[ccl_pfam_p_g_imap_top_v_msk]
ccl_pfam_p_g_imap_top$membership <-
  ccl_pfam_p_g_imap_top$membership[ccl_pfam_p_g_imap_top_v_msk]
ccl_pfam_p_g_imap_top$vcount <- length(ccl_pfam_p_g_imap_top$names)

# SpinGlass +Network
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_p_g_spin_file <- "data/ccl_pfam_p_g_spin.Rda"
if (!file.exists(ccl_pfam_p_g_spin_file)) {
  ccl_pfam_p_g_spin <- cluster_spinglass(ccl_pfam_p_g)
  save(ccl_pfam_p_g_spin, file = ccl_pfam_p_g_spin_file)
} else {
  load(ccl_pfam_p_g_spin_file)
}
ccl_pfam_p_g_spin_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_p_g_spin), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_p_g_spin_top_v_msk <- (
  ccl_pfam_p_g_spin$membership %in% ccl_pfam_p_g_spin_top_ids
)
ccl_pfam_p_g_spin_top_g <- induced_subgraph(
  ccl_pfam_p_g,
  V(ccl_pfam_p_g)[ccl_pfam_p_g_spin_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_p_g_spin_top <- ccl_pfam_p_g_spin
ccl_pfam_p_g_spin_top$names <-
  ccl_pfam_p_g_spin_top$names[ccl_pfam_p_g_spin_top_v_msk]
ccl_pfam_p_g_spin_top$membership <-
  ccl_pfam_p_g_spin_top$membership[ccl_pfam_p_g_spin_top_v_msk]
ccl_pfam_p_g_spin_top$vcount <- length(ccl_pfam_p_g_spin_top$names)

# SpinGlass +/-Network
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_g_spin_file <- "data/ccl_pfam_g_spin.Rda"
if (!file.exists(ccl_pfam_g_spin_file)) {
  ccl_pfam_g_spin <- cluster_spinglass(ccl_pfam_g,
                                       implementation = "neg")
  save(ccl_pfam_g_spin, file = ccl_pfam_g_spin_file)
} else {
  load(ccl_pfam_g_spin_file)
}
ccl_pfam_g_spin_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_g_spin), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_g_spin_top_v_msk <- (
  ccl_pfam_g_spin$membership %in% ccl_pfam_g_spin_top_ids
)
ccl_pfam_g_spin_top_g <- induced_subgraph(
  ccl_pfam_g,
  V(ccl_pfam_g)[ccl_pfam_g_spin_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_g_spin_top <- ccl_pfam_g_spin
ccl_pfam_g_spin_top$names <-
  ccl_pfam_g_spin_top$names[ccl_pfam_g_spin_top_v_msk]
ccl_pfam_g_spin_top$membership <-
  ccl_pfam_g_spin_top$membership[ccl_pfam_g_spin_top_v_msk]
ccl_pfam_g_spin_top$vcount <- length(ccl_pfam_g_spin_top$names)

par(mfrow = c(3, 2), mar = c(1, 1, 3, 1))
plot(
  ccl_pfam_p_g_imap,
  ccl_pfam_p_g,
  layout = ccl_pfam_p_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM CCLasso +Network InfoMap Clusters"
)
plot(
  ccl_pfam_p_g_imap_top,
  ccl_pfam_p_g_imap_top_g,
  col = membership(ccl_pfam_p_g_imap)[ccl_pfam_p_g_imap_top_v_msk],
  layout = ccl_pfam_p_g_coord[ccl_pfam_p_g_imap_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(ccl_pfam_p_g_imap)),
                        alpha = 1)[ccl_pfam_p_g_imap_top_ids],
  mark.col = rainbow(length(communities(ccl_pfam_p_g_imap)),
                     alpha = 0.3)[ccl_pfam_p_g_imap_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)
plot(
  ccl_pfam_p_g_spin,
  ccl_pfam_p_g,
  layout = ccl_pfam_p_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM CCLasso +Network SpinGlass Clusters"
)
plot(
  ccl_pfam_p_g_spin_top,
  ccl_pfam_p_g_spin_top_g,
  col = membership(ccl_pfam_p_g_spin)[ccl_pfam_p_g_spin_top_v_msk],
  layout = ccl_pfam_p_g_coord[ccl_pfam_p_g_spin_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(ccl_pfam_p_g_spin)),
                        alpha = 1)[ccl_pfam_p_g_spin_top_ids],
  mark.col = rainbow(length(communities(ccl_pfam_p_g_spin)),
                     alpha = 0.3)[ccl_pfam_p_g_spin_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)
plot(
  ccl_pfam_g_spin,
  ccl_pfam_g,
  layout = ccl_pfam_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM CCLasso +/-Network SpinGlass Clusters"
)
plot(
  ccl_pfam_g_spin_top,
  ccl_pfam_g_spin_top_g,
  col = membership(ccl_pfam_g_spin)[ccl_pfam_g_spin_top_v_msk],
  layout = ccl_pfam_g_coord[ccl_pfam_g_spin_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(ccl_pfam_g_spin)),
                        alpha = 1)[ccl_pfam_g_spin_top_ids],
  mark.col = rainbow(length(communities(ccl_pfam_g_spin)),
                     alpha = 0.3)[ccl_pfam_g_spin_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)

Build Network Including Patient Response

# Add patient response to dataset
pfam_resp_data <- pfam_data
pfam_resp_data$Sample <- rownames(pfam_resp_data)
pfam_resp_data <- merge.easy(pfam_resp_data, patient_meta, key = "Sample")
rownames(pfam_resp_data) <- pfam_resp_data$Sample
pfam_resp_data$Sample <- NULL

# CCLasso takes many hours to run (load cached if available)
ccl_pfam_resp_file <- "data/ccl_pfam_resp_cv200.Rda"
if (!file.exists(ccl_pfam_resp_file)) {
  ccl_pfam_resp <- cclasso(as.matrix(pfam_resp_data), counts = TRUE)
  save(ccl_pfam_resp, file = ccl_pfam_resp_file)
} else {
  load(ccl_pfam_resp_file)
}
ccl_pfam_resp$cor_w[is.nan(ccl_pfam_resp$cor_w)] <- 0
ccl_pfam_resp_pvals_vec <- ccl_pfam_resp$p_vals[upper.tri(ccl_pfam_resp$p_vals)]
ccl_pfam_resp_pvals_adj <- p.adjust(ccl_pfam_resp_pvals_vec, "BH")
ccl_pfam_resp_edges <- ccl_pfam_resp$cor_w[upper.tri(ccl_pfam_resp$cor_w)]
# p-value < 1e-4 otherwise CCLasso network number of edges is very large
ccl_pfam_resp_edges[ccl_pfam_resp_pvals_adj > 1e-4] <- 0
ccl_pfam_resp_amat <- matrix(0, dim(pfam_resp_data)[2], dim(pfam_resp_data)[2])
rownames(ccl_pfam_resp_amat) <- colnames(pfam_resp_data)
colnames(ccl_pfam_resp_amat) <- colnames(pfam_resp_data)
ccl_pfam_resp_amat[upper.tri(ccl_pfam_resp_amat)] <- ccl_pfam_resp_edges
ccl_pfam_resp_g <- graph_from_adjacency_matrix(
  ccl_pfam_resp_amat,
  mode = "upper",
  weighted = TRUE,
  diag = FALSE
)
ccl_pfam_resp_g <- induced_subgraph(
  ccl_pfam_resp_g,
  V(ccl_pfam_resp_g)[
    components(ccl_pfam_resp_g)$membership ==
    which.max(components(ccl_pfam_resp_g)$csize)
  ]
)
ccl_pfam_resp_p_g <- delete_edges(
  ccl_pfam_resp_g,
  E(ccl_pfam_resp_g)[E(ccl_pfam_resp_g)$weight < 0]
)
ccl_pfam_resp_p_g <- induced_subgraph(
  ccl_pfam_resp_p_g,
  V(ccl_pfam_resp_p_g)[
    components(ccl_pfam_resp_p_g)$membership ==
    which.max(components(ccl_pfam_resp_p_g)$csize)
  ]
)
ccl_pfam_resp_p_g_v_msk <- (
  V(ccl_pfam_resp_g)$name %in% V(ccl_pfam_resp_p_g)$name
)
ccl_pfam_resp_n_g <- delete_edges(
  ccl_pfam_resp_g,
  E(ccl_pfam_resp_g)[E(ccl_pfam_resp_g)$weight > 0]
)
ccl_pfam_resp_n_g <- induced_subgraph(
  ccl_pfam_resp_n_g,
  V(ccl_pfam_resp_n_g)[
    components(ccl_pfam_resp_n_g)$membership ==
    which.max(components(ccl_pfam_resp_n_g)$csize)
  ]
)
ccl_pfam_resp_n_g_v_msk <- (
  V(ccl_pfam_resp_g)$name %in% V(ccl_pfam_resp_n_g)$name
)
ccl_pfam_resp_g_coord <- layout_with_lgl(ccl_pfam_resp_g)
ccl_pfam_resp_p_g_coord <-
  ccl_pfam_resp_g_coord[ccl_pfam_resp_p_g_v_msk, , drop = F]
ccl_pfam_resp_n_g_coord <-
  ccl_pfam_resp_g_coord[ccl_pfam_resp_n_g_v_msk, , drop = F]

Where does response cluster?

ccl_pfam_resp_p_g_imap <- cluster_infomap(ccl_pfam_resp_p_g)
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_resp_p_g_spin_file <- "data/ccl_pfam_resp_p_g_spin.Rda"
if (!file.exists(ccl_pfam_resp_p_g_spin_file)) {
  ccl_pfam_resp_p_g_spin <- cluster_spinglass(ccl_pfam_resp_p_g)
  save(ccl_pfam_resp_p_g_spin, file = ccl_pfam_resp_p_g_spin_file)
} else {
  load(ccl_pfam_resp_p_g_spin_file)
}
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_resp_g_spin_file <- "data/ccl_pfam_resp_g_spin.Rda"
if (!file.exists(ccl_pfam_resp_g_spin_file)) {
  ccl_pfam_resp_g_spin <- cluster_spinglass(ccl_pfam_resp_g,
                                            implementation = "neg")
  save(ccl_pfam_resp_g_spin, file = ccl_pfam_resp_g_spin_file)
} else {
  load(ccl_pfam_resp_g_spin_file)
}
ccl_pfam_resp_p_g_imap_data <- data.frame()
for (i in 1:max(ccl_pfam_resp_p_g_imap$membership)) {
  ccl_pfam_resp_p_g_imap_data <- rbind(
    ccl_pfam_resp_p_g_imap_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_resp_p_g)$name[
        ccl_pfam_resp_p_g_imap$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_resp_p_g_imap_data <-
  merge.easy(ccl_pfam_resp_p_g_imap_data, pfam_feat, key = "Accession")
ccl_pfam_resp_p_g_spin_data <- data.frame()
for (i in 1:max(ccl_pfam_resp_p_g_spin$membership)) {
  ccl_pfam_resp_p_g_spin_data <- rbind(
    ccl_pfam_resp_p_g_spin_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_resp_p_g)$name[
        ccl_pfam_resp_p_g_spin$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_resp_p_g_spin_data <-
  merge.easy(ccl_pfam_resp_p_g_spin_data, pfam_feat, key = "Accession")
ccl_pfam_resp_g_spin_data <- data.frame()
for (i in 1:max(ccl_pfam_resp_g_spin$membership)) {
  ccl_pfam_resp_g_spin_data <- rbind(
    ccl_pfam_resp_g_spin_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_resp_g)$name[
        ccl_pfam_resp_g_spin$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_resp_g_spin_data <-
  merge.easy(ccl_pfam_resp_g_spin_data, pfam_feat, key = "Accession")
write.csv(ccl_pfam_resp_p_g_imap_data,
          file = "results/PfamRes_CCLassoPosNet_InfoMapClusters.csv",
          row.names = FALSE)
write.csv(ccl_pfam_resp_p_g_spin_data,
          file = "results/PfamRes_CCLassoPosNet_SpinGlassClusters.csv",
          row.names = FALSE)
write.csv(ccl_pfam_resp_g_spin_data,
          file = "results/PfamRes_CCLassoNet_SpinGlassClusters.csv",
          row.names = FALSE)
cat(
  "InfoMap +Network response cluster:",
  ccl_pfam_resp_p_g_imap_data$Cluster[
    ccl_pfam_resp_p_g_imap_data$Accession == "ResponseBinary"
  ],
  "\nSpinGlass +Network response cluster:",
  ccl_pfam_resp_p_g_spin_data$Cluster[
    ccl_pfam_resp_p_g_spin_data$Accession == "ResponseBinary"
  ],
  "\nSpinGlass +/-Network response cluster:",
  ccl_pfam_resp_g_spin_data$Cluster[
    ccl_pfam_resp_g_spin_data$Accession == "ResponseBinary"
  ],
  "\n"
)
## InfoMap +Network response cluster: 2
## SpinGlass +Network response cluster: 4
## SpinGlass +/-Network response cluster: 1

Bipartite Graphs of InfoMap and SpinGlass Clusters Effect on Response

ccl_pfam_p_g_imap_clusts <- data.frame(
  Node = V(ccl_pfam_p_g)$name,
  Cluster = ccl_pfam_p_g_imap$membership,
  stringsAsFactors = FALSE
)
ccl_pfam_p_g_spin_clusts <- data.frame(
  Node = V(ccl_pfam_p_g)$name,
  Cluster = ccl_pfam_p_g_spin$membership,
  stringsAsFactors = FALSE
)
ccl_pfam_g_spin_clusts <- data.frame(
  Node = V(ccl_pfam_g)$name,
  Cluster = ccl_pfam_g_spin$membership,
  stringsAsFactors = FALSE
)

ccl_pfam_resp_g_edges <- cbind(as_edgelist(ccl_pfam_resp_g),
                               E(ccl_pfam_resp_g)$weight)
ccl_pfam_resp_g_resp_edges <-
  as.data.frame(ccl_pfam_resp_g_edges[c(
    ccl_pfam_resp_g_edges[, 1] == "ResponseBinary" |
      ccl_pfam_resp_g_edges[, 2] == "ResponseBinary"
  ), ], stringsAsFactors = FALSE)
names(ccl_pfam_resp_g_resp_edges) <- c("Node", "Site", "Weight")

ccl_pfam_resp_p_g_imap_clusts_resp_edges <-
  merge.easy(ccl_pfam_resp_g_resp_edges,
             ccl_pfam_p_g_imap_clusts,
             key = "Node")
ccl_pfam_resp_p_g_imap_clusts_resp_edges$Weight <-
  as.numeric(ccl_pfam_resp_p_g_imap_clusts_resp_edges$Weight)
ccl_pfam_resp_p_g_spin_clusts_resp_edges <-
  merge.easy(ccl_pfam_resp_g_resp_edges,
             ccl_pfam_p_g_spin_clusts,
             key = "Node")
ccl_pfam_resp_p_g_spin_clusts_resp_edges$Weight <-
  as.numeric(ccl_pfam_resp_p_g_spin_clusts_resp_edges$Weight)
ccl_pfam_resp_g_spin_clusts_resp_edges <-
  merge.easy(ccl_pfam_resp_g_resp_edges,
             ccl_pfam_g_spin_clusts,
             key = "Node")
ccl_pfam_resp_g_spin_clusts_resp_edges$Weight <-
  as.numeric(ccl_pfam_resp_g_spin_clusts_resp_edges$Weight)

ccl_pfam_resp_p_g_imap_clusts_resp_clusts <-
  ccl_pfam_resp_p_g_imap_clusts_resp_edges %>%
  group_by(Site, Cluster) %>%
  summarise(Weight = mean(Weight)) %>%
  subset(!is.na(Cluster))
ccl_pfam_resp_p_g_spin_clusts_resp_clusts <-
  ccl_pfam_resp_p_g_spin_clusts_resp_edges %>%
  group_by(Site, Cluster) %>%
  summarise(Weight = mean(Weight)) %>%
  subset(!is.na(Cluster))
ccl_pfam_resp_g_spin_clusts_resp_clusts <-
  ccl_pfam_resp_g_spin_clusts_resp_edges %>%
  group_by(Site, Cluster) %>%
  summarise(Weight = mean(Weight)) %>%
  subset(!is.na(Cluster))

ccl_pfam_resp_p_g_imap_bipart_g <-
  graph.data.frame(ccl_pfam_resp_p_g_imap_clusts_resp_clusts[, 1:2],
                   directed = FALSE)
V(ccl_pfam_resp_p_g_imap_bipart_g)$type <-
  V(ccl_pfam_resp_p_g_imap_bipart_g)$name %in%
  as.character(unlist(ccl_pfam_resp_p_g_imap_clusts_resp_clusts[, 1]))
E(ccl_pfam_resp_p_g_imap_bipart_g)$weight <-
  ccl_pfam_resp_p_g_imap_clusts_resp_clusts$Weight
V(ccl_pfam_resp_p_g_imap_bipart_g)$color <-
  V(ccl_pfam_resp_p_g_imap_bipart_g)$type
V(ccl_pfam_resp_p_g_imap_bipart_g)$color <-
  gsub("FALSE", rgb(0, 1, 1, 0.2),
       V(ccl_pfam_resp_p_g_imap_bipart_g)$color)
V(ccl_pfam_resp_p_g_imap_bipart_g)$color <-
  gsub("TRUE", rgb(1, 1, 0, 0.2),
       V(ccl_pfam_resp_p_g_imap_bipart_g)$color)

ccl_pfam_resp_p_g_spin_bipart_g <-
  graph.data.frame(ccl_pfam_resp_p_g_spin_clusts_resp_clusts[, 1:2],
                   directed = FALSE)
V(ccl_pfam_resp_p_g_spin_bipart_g)$type <-
  V(ccl_pfam_resp_p_g_spin_bipart_g)$name %in%
  as.character(unlist(ccl_pfam_resp_p_g_spin_clusts_resp_clusts[, 1]))
E(ccl_pfam_resp_p_g_spin_bipart_g)$weight <-
  ccl_pfam_resp_p_g_spin_clusts_resp_clusts$Weight
V(ccl_pfam_resp_p_g_spin_bipart_g)$color <-
  V(ccl_pfam_resp_p_g_spin_bipart_g)$type
V(ccl_pfam_resp_p_g_spin_bipart_g)$color <-
  gsub("FALSE", rgb(0, 1, 1, 0.2),
       V(ccl_pfam_resp_p_g_spin_bipart_g)$color)
V(ccl_pfam_resp_p_g_spin_bipart_g)$color <-
  gsub("TRUE", rgb(1, 1, 0, 0.2),
       V(ccl_pfam_resp_p_g_spin_bipart_g)$color)

ccl_pfam_resp_g_spin_bipart_g <-
  graph.data.frame(ccl_pfam_resp_g_spin_clusts_resp_clusts[, 1:2],
                   directed = FALSE)
V(ccl_pfam_resp_g_spin_bipart_g)$type <-
  V(ccl_pfam_resp_g_spin_bipart_g)$name %in%
  as.character(unlist(ccl_pfam_resp_g_spin_clusts_resp_clusts[, 1]))
E(ccl_pfam_resp_g_spin_bipart_g)$weight <-
  ccl_pfam_resp_g_spin_clusts_resp_clusts$Weight
V(ccl_pfam_resp_g_spin_bipart_g)$color <-
  V(ccl_pfam_resp_g_spin_bipart_g)$type
V(ccl_pfam_resp_g_spin_bipart_g)$color <-
  gsub("FALSE", rgb(0, 1, 1, 0.2),
       V(ccl_pfam_resp_g_spin_bipart_g)$color)
V(ccl_pfam_resp_g_spin_bipart_g)$color <-
  gsub("TRUE", rgb(1, 1, 0, 0.2),
       V(ccl_pfam_resp_g_spin_bipart_g)$color)

par(mfrow = c(1, 3), mar = c(1, 1, 3, 1))
plot(
  ccl_pfam_resp_p_g_imap_bipart_g,
  edge.color = ifelse(
    E(ccl_pfam_resp_p_g_imap_bipart_g)$weight > 0,
    rgb(0, 1, 0),
    rgb(1, 0, 0)
  ),
  edge.width = abs(E(ccl_pfam_resp_p_g_imap_bipart_g)$weight) * 30,
  layout = layout_as_bipartite,
  vertex.size = 15,
  vertex.label.cex = 1,
  vertex.label.color = "black",
  main = "PFAM Response CCLasso +Network\nInfoMap Cluster Effect on Response"
)
plot(
  ccl_pfam_resp_p_g_spin_bipart_g,
  edge.color = ifelse(
    E(ccl_pfam_resp_p_g_spin_bipart_g)$weight > 0,
    rgb(0, 1, 0),
    rgb(1, 0, 0)
  ),
  edge.width = abs(E(ccl_pfam_resp_p_g_spin_bipart_g)$weight) * 30,
  layout = layout_as_bipartite,
  vertex.size = 15,
  vertex.label.cex = 1,
  vertex.label.color = "black",
  main = "PFAM Response CCLasso +Network\nSpinGlass Cluster Effect on Response"
)
plot(
  ccl_pfam_resp_g_spin_bipart_g,
  edge.color = ifelse(
    E(ccl_pfam_resp_g_spin_bipart_g)$weight > 0,
    rgb(0, 1, 0),
    rgb(1, 0, 0)
  ),
  edge.width = abs(E(ccl_pfam_resp_g_spin_bipart_g)$weight) * 30,
  layout = layout_as_bipartite,
  vertex.size = 15,
  vertex.label.cex = 1,
  vertex.label.color = "black",
  main = "PFAM Response CCLasso +/-Network\nSpinGlass Cluster Effect on Response"
)

Save cluster bipartite results:

pfam_feat_2 <- pfam_feat
names(pfam_feat_2)[1] <- "Node"

ccl_pfam_p_g_imap_clusts <-
  merge.easy(ccl_pfam_p_g_imap_clusts, pfam_feat_2, key = "Node")
ccl_pfam_resp_p_g_imap_clusts_resp_clust_weight <-
  ccl_pfam_resp_p_g_imap_clusts_resp_clusts[, c("Cluster", "Weight")]
names(ccl_pfam_resp_p_g_imap_clusts_resp_clust_weight)[2] <- "ResponseEffect"
ccl_pfam_p_g_imap_clusts <-
  merge.easy(
    ccl_pfam_p_g_imap_clusts,
    ccl_pfam_resp_p_g_imap_clusts_resp_clust_weight,
    key = "Cluster"
  )
write.csv(
  ccl_pfam_p_g_imap_clusts,
  file = "results/Pfam_CCLassoPosNet_InfoMapClusters_ResponseEffect.csv",
  row.names = FALSE
)

ccl_pfam_p_g_spin_clusts <-
  merge.easy(ccl_pfam_p_g_spin_clusts, pfam_feat_2, key = "Node")
ccl_pfam_resp_p_g_spin_clusts_resp_clust_weight <-
  ccl_pfam_resp_p_g_spin_clusts_resp_clusts[, c("Cluster", "Weight")]
names(ccl_pfam_resp_p_g_spin_clusts_resp_clust_weight)[2] <- "ResponseEffect"
ccl_pfam_p_g_spin_clusts <-
  merge.easy(
    ccl_pfam_p_g_spin_clusts,
    ccl_pfam_resp_p_g_spin_clusts_resp_clust_weight,
    key = "Cluster"
  )
write.csv(
  ccl_pfam_p_g_spin_clusts,
  file = "results/Pfam_CCLassoPosNet_SpinGlassClusters_ResponseEffect.csv",
  row.names = FALSE
)

ccl_pfam_g_spin_clusts <-
  merge.easy(ccl_pfam_g_spin_clusts, pfam_feat_2, key = "Node")
ccl_pfam_resp_g_spin_clusts_resp_clust_weight <-
  ccl_pfam_resp_g_spin_clusts_resp_clusts[, c("Cluster", "Weight")]
names(ccl_pfam_resp_g_spin_clusts_resp_clust_weight)[2] <- "ResponseEffect"
ccl_pfam_g_spin_clusts <-
  merge.easy(
    ccl_pfam_g_spin_clusts,
    ccl_pfam_resp_g_spin_clusts_resp_clust_weight,
    key = "Cluster"
  )
write.csv(
  ccl_pfam_g_spin_clusts,
  file = "results/Pfam_CCLassoNet_SpinGlassClusters_ResponseEffect.csv",
  row.names = FALSE
)

Build Responder and Non-Responder Networks

# Responder
pfam_rspdr_data <-
  pfam_data[patient_meta$Sample[patient_meta$ResponseBinary == 1], ]

# CCLasso takes many hours to run (load cached if available)
ccl_pfam_rspdr_file <- "data/ccl_pfam_rspdr_cv200.Rda"
if (!file.exists(ccl_pfam_rspdr_file)) {
  ccl_pfam_rspdr <- cclasso(as.matrix(pfam_rspdr_data), counts = TRUE)
  save(ccl_pfam_rspdr, file = ccl_pfam_rspdr_file)
} else {
  load(ccl_pfam_rspdr_file)
}
ccl_pfam_rspdr$cor_w[is.nan(ccl_pfam_rspdr$cor_w)] <- 0
ccl_pfam_rspdr_pvals_vec <-
  ccl_pfam_rspdr$p_vals[upper.tri(ccl_pfam_rspdr$p_vals)]
ccl_pfam_rspdr_pvals_adj <-
  p.adjust(ccl_pfam_rspdr_pvals_vec, "BH")
ccl_pfam_rspdr_edges <-
  ccl_pfam_rspdr$cor_w[upper.tri(ccl_pfam_rspdr$cor_w)]
# p-value < 1e-4 otherwise CCLasso network number of edges is very large
ccl_pfam_rspdr_edges[ccl_pfam_rspdr_pvals_adj > 1e-4] <- 0
ccl_pfam_rspdr_amat <-
  matrix(0, dim(pfam_rspdr_data)[2], dim(pfam_rspdr_data)[2])
rownames(ccl_pfam_rspdr_amat) <- colnames(pfam_rspdr_data)
colnames(ccl_pfam_rspdr_amat) <- colnames(pfam_rspdr_data)
ccl_pfam_rspdr_amat[upper.tri(ccl_pfam_rspdr_amat)] <- ccl_pfam_rspdr_edges
ccl_pfam_rspdr_g <- graph_from_adjacency_matrix(
  ccl_pfam_rspdr_amat,
  mode = "upper",
  weighted = TRUE,
  diag = FALSE
)
ccl_pfam_rspdr_g <- induced_subgraph(
  ccl_pfam_rspdr_g,
  V(ccl_pfam_rspdr_g)[
    components(ccl_pfam_rspdr_g)$membership ==
    which.max(components(ccl_pfam_rspdr_g)$csize)
  ]
)
ccl_pfam_rspdr_g_v_msk <- (
  V(ccl_pfam_rspdr_g)$name %in% V(ccl_pfam_rspdr_g)$name
)
ccl_pfam_rspdr_p_g <- delete_edges(
  ccl_pfam_rspdr_g,
  E(ccl_pfam_rspdr_g)[E(ccl_pfam_rspdr_g)$weight < 0]
)
ccl_pfam_rspdr_p_g <- induced_subgraph(
  ccl_pfam_rspdr_p_g,
  V(ccl_pfam_rspdr_p_g)[
    components(ccl_pfam_rspdr_p_g)$membership ==
    which.max(components(ccl_pfam_rspdr_p_g)$csize)
  ]
)
ccl_pfam_rspdr_p_g_v_msk <- (
  V(ccl_pfam_rspdr_g)$name %in% V(ccl_pfam_rspdr_p_g)$name
)
ccl_pfam_rspdr_n_g <- delete_edges(
  ccl_pfam_rspdr_g,
  E(ccl_pfam_rspdr_g)[E(ccl_pfam_rspdr_g)$weight > 0]
)
ccl_pfam_rspdr_n_g <- induced_subgraph(
  ccl_pfam_rspdr_n_g,
  V(ccl_pfam_rspdr_n_g)[
    components(ccl_pfam_rspdr_n_g)$membership ==
    which.max(components(ccl_pfam_rspdr_n_g)$csize)
  ]
)
ccl_pfam_rspdr_n_g_v_msk <- (
  V(ccl_pfam_rspdr_g)$name %in% V(ccl_pfam_rspdr_n_g)$name
)

# Non-Responder
pfam_nspdr_data <-
  pfam_data[patient_meta$Sample[patient_meta$ResponseBinary == 0], ]

# CCLasso takes many hours to run (load cached if available)
ccl_pfam_nspdr_file <- "data/ccl_pfam_nspdr_cv200.Rda"
if (!file.exists(ccl_pfam_nspdr_file)) {
  ccl_pfam_nspdr <- cclasso(as.matrix(pfam_nspdr_data), counts = TRUE)
  save(ccl_pfam_nspdr, file = ccl_pfam_nspdr_file)
} else {
  load(ccl_pfam_nspdr_file)
}
ccl_pfam_nspdr$cor_w[is.nan(ccl_pfam_nspdr$cor_w)] <- 0
ccl_pfam_nspdr_pvals_vec <-
  ccl_pfam_nspdr$p_vals[upper.tri(ccl_pfam_nspdr$p_vals)]
ccl_pfam_nspdr_pvals_adj <-
  p.adjust(ccl_pfam_nspdr_pvals_vec, "BH")
ccl_pfam_nspdr_edges <-
  ccl_pfam_nspdr$cor_w[upper.tri(ccl_pfam_nspdr$cor_w)]
# p-value < 1e-4 otherwise CCLasso network number of edges is very large
ccl_pfam_nspdr_edges[ccl_pfam_nspdr_pvals_adj > 1e-4] <- 0
ccl_pfam_nspdr_amat <-
  matrix(0, dim(pfam_nspdr_data)[2], dim(pfam_nspdr_data)[2])
rownames(ccl_pfam_nspdr_amat) <- colnames(pfam_nspdr_data)
colnames(ccl_pfam_nspdr_amat) <- colnames(pfam_nspdr_data)
ccl_pfam_nspdr_amat[upper.tri(ccl_pfam_nspdr_amat)] <- ccl_pfam_nspdr_edges
ccl_pfam_nspdr_g <- graph_from_adjacency_matrix(
  ccl_pfam_nspdr_amat,
  mode = "upper",
  weighted = TRUE,
  diag = FALSE
)
ccl_pfam_nspdr_g <- induced_subgraph(
  ccl_pfam_nspdr_g,
  V(ccl_pfam_nspdr_g)[
    components(ccl_pfam_nspdr_g)$membership ==
    which.max(components(ccl_pfam_nspdr_g)$csize)
  ]
)
ccl_pfam_nspdr_g_v_msk <- (
  V(ccl_pfam_nspdr_g)$name %in% V(ccl_pfam_nspdr_g)$name
)
ccl_pfam_nspdr_p_g <- delete_edges(
  ccl_pfam_nspdr_g,
  E(ccl_pfam_nspdr_g)[E(ccl_pfam_nspdr_g)$weight < 0]
)
ccl_pfam_nspdr_p_g <- induced_subgraph(
  ccl_pfam_nspdr_p_g,
  V(ccl_pfam_nspdr_p_g)[
    components(ccl_pfam_nspdr_p_g)$membership ==
    which.max(components(ccl_pfam_nspdr_p_g)$csize)
  ]
)
ccl_pfam_nspdr_p_g_v_msk <- (
  V(ccl_pfam_nspdr_g)$name %in% V(ccl_pfam_nspdr_p_g)$name
)
ccl_pfam_nspdr_n_g <- delete_edges(
  ccl_pfam_nspdr_g,
  E(ccl_pfam_nspdr_g)[E(ccl_pfam_nspdr_g)$weight > 0]
)
ccl_pfam_nspdr_n_g <- induced_subgraph(
  ccl_pfam_nspdr_n_g,
  V(ccl_pfam_nspdr_n_g)[
    components(ccl_pfam_nspdr_n_g)$membership ==
    which.max(components(ccl_pfam_nspdr_n_g)$csize)
  ]
)
ccl_pfam_nspdr_n_g_v_msk <- (
  V(ccl_pfam_nspdr_g)$name %in% V(ccl_pfam_nspdr_n_g)$name
)

Visualize Networks

Network Statistics

Degree distribution, shortest paths distance distribution, transitivity:

par(mfrow = c(4, 2), mar = c(5.1, 4.1, 4.1, 2.1))
# degree distribution
k_r <- degree(ccl_pfam_rspdr_g)
hist(k_r,
     breaks = 100,
     xlab = "k",
     ylab = "Frequency",
     main = "R Degree Frequency")
k_n <- degree(ccl_pfam_nspdr_g)
hist(k_n,
     breaks = 100,
     xlab = "k",
     ylab = NA,
     main = "NR Degree Frequency")
dd_r <- degree_distribution(ccl_pfam_rspdr_g)
dd_r_nzero_pos <- which(dd_r != 0)
pk_r <- dd_r[dd_r_nzero_pos]
dx_r <- 1:max(k_r)
dx_r <- dx_r[dd_r_nzero_pos]
plot(
  pk_r ~ log(dx_r),
  log = "xy",
  xlab = "log k",
  ylab = "log Pk",
  main = "R Log-Log Degree Distribution"
)
dd_n <- degree_distribution(ccl_pfam_nspdr_g)
dd_n_nzero_pos <- which(dd_n != 0)
pk_n <- dd_n[dd_n_nzero_pos]
dx_n <- 1:max(k_n)
dx_n <- dx_n[dd_n_nzero_pos]
plot(
  pk_n ~ log(dx_n),
  log = "xy",
  xlab = "log k",
  ylab = NA,
  main = "NR Log-Log Degree Distribution"
)
# distance distribution of shortest paths
nv_r <- vcount(ccl_pfam_rspdr_g)
bfs_r_vec <- matrix(nrow = nv_r, ncol = nv_r)
for (i in 1:nv_r) {
  bfs_r_vec[i, ] <- bfs(
    ccl_pfam_rspdr_g,
    root = i,
    dist = TRUE,
    unreachable = FALSE
  )$dist
}
distd_r <- table(bfs_r_vec) / sum(bfs_r_vec, na.rm = TRUE)
distd_r <- tail(distd_r, length(distd_r) - 1)
plot(
  names(distd_r),
  distd_r,
  xlab = "Distance",
  ylab = expression(P[Distance]),
  main = "R Distance Distribution of Shortest Paths"
)
nv_n <- vcount(ccl_pfam_nspdr_g)
bfs_n_vec <- matrix(nrow = nv_n, ncol = nv_n)
for (i in 1:nv_n) {
  bfs_n_vec[i, ] <- bfs(
    ccl_pfam_nspdr_g,
    root = i,
    dist = TRUE,
    unreachable = FALSE
  )$dist
}
distd_n <- table(bfs_n_vec) / sum(bfs_n_vec, na.rm = TRUE)
distd_n <- tail(distd_n, length(distd_n) - 1)
plot(
  names(distd_n),
  distd_n,
  xlab = "Distance",
  ylab = NA,
  main = "NR Distance Distribution of Shortest Paths"
)
# clustering
cl_r <- transitivity(ccl_pfam_rspdr_g, type = "local")
cl_r_df <- data.frame(clust = cl_r, degree = k_r) %>%
  group_by(degree) %>%
  summarize(mclust = mean(clust, na.rm = TRUE))
plot(
  mclust ~ degree,
  data = cl_r_df,
  xlab = "k",
  ylab = "C(k)",
  main = "R Clustering Coefficent"
)
cl_n <- transitivity(ccl_pfam_nspdr_g, type = "local")
cl_n_df <- data.frame(clust = cl_n, degree = k_n) %>%
  group_by(degree) %>%
  summarize(mclust = mean(clust, na.rm = TRUE))
plot(
  mclust ~ degree,
  data = cl_n_df,
  xlab = "k",
  ylab = NA,
  main = "NR Clustering Coefficent"
)

Additional statistics:

Note: could not calculate betweenness and closeness on networks since it appears networks are too large and it crashes the R session

##                Clustering.coef. Power.law.coef. Mean.shortest.path
## Responders            0.8931404        1.333328           2.653384
## Non-Responders        0.8205228        2.000000           2.735562
##                  Density
## Responders     0.1458031
## Non-Responders 0.1200417

Community Detection in Responder/Non-Responder Networks Using InfoMap and SpinGlass

num_top_clusts <- 3

# Responder +Network InfoMap
ccl_pfam_rspdr_p_g_imap <- cluster_infomap(ccl_pfam_rspdr_p_g)
ccl_pfam_rspdr_p_g_imap_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_rspdr_p_g_imap), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_rspdr_p_g_imap_top_v_msk <- (
  ccl_pfam_rspdr_p_g_imap$membership %in% ccl_pfam_rspdr_p_g_imap_top_ids
)
ccl_pfam_rspdr_p_g_imap_top_g <- induced_subgraph(
  ccl_pfam_rspdr_p_g,
  V(ccl_pfam_rspdr_p_g)[ccl_pfam_rspdr_p_g_imap_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_rspdr_p_g_imap_top <- ccl_pfam_rspdr_p_g_imap
ccl_pfam_rspdr_p_g_imap_top$names <-
  ccl_pfam_rspdr_p_g_imap_top$names[ccl_pfam_rspdr_p_g_imap_top_v_msk]
ccl_pfam_rspdr_p_g_imap_top$membership <-
  ccl_pfam_rspdr_p_g_imap_top$membership[ccl_pfam_rspdr_p_g_imap_top_v_msk]
ccl_pfam_rspdr_p_g_imap_top$vcount <- length(ccl_pfam_rspdr_p_g_imap_top$names)

# Responder +Network SpinGlass
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_rspdr_p_g_spin_file <- "data/ccl_pfam_rspdr_p_g_spin.Rda"
if (!file.exists(ccl_pfam_rspdr_p_g_spin_file)) {
  ccl_pfam_rspdr_p_g_spin <- cluster_spinglass(ccl_pfam_rspdr_p_g)
  save(ccl_pfam_rspdr_p_g_spin, file = ccl_pfam_rspdr_p_g_spin_file)
} else {
  load(ccl_pfam_rspdr_p_g_spin_file)
}
ccl_pfam_rspdr_p_g_spin_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_rspdr_p_g_spin), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_rspdr_p_g_spin_top_v_msk <- (
  ccl_pfam_rspdr_p_g_spin$membership %in% ccl_pfam_rspdr_p_g_spin_top_ids
)
ccl_pfam_rspdr_p_g_spin_top_g <- induced_subgraph(
  ccl_pfam_rspdr_p_g,
  V(ccl_pfam_rspdr_p_g)[ccl_pfam_rspdr_p_g_spin_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_rspdr_p_g_spin_top <- ccl_pfam_rspdr_p_g_spin
ccl_pfam_rspdr_p_g_spin_top$names <-
  ccl_pfam_rspdr_p_g_spin_top$names[ccl_pfam_rspdr_p_g_spin_top_v_msk]
ccl_pfam_rspdr_p_g_spin_top$membership <-
  ccl_pfam_rspdr_p_g_spin_top$membership[ccl_pfam_rspdr_p_g_spin_top_v_msk]
ccl_pfam_rspdr_p_g_spin_top$vcount <- length(ccl_pfam_rspdr_p_g_spin_top$names)

# Responder +/-Network SpinGlass
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_rspdr_g_spin_file <- "data/ccl_pfam_rspdr_g_spin.Rda"
if (!file.exists(ccl_pfam_rspdr_g_spin_file)) {
  ccl_pfam_rspdr_g_spin <- cluster_spinglass(ccl_pfam_rspdr_g,
                                             implementation = "neg")
  save(ccl_pfam_rspdr_g_spin, file = ccl_pfam_rspdr_g_spin_file)
} else {
  load(ccl_pfam_rspdr_g_spin_file)
}
ccl_pfam_rspdr_g_spin_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_rspdr_g_spin), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_rspdr_g_spin_top_v_msk <- (
  ccl_pfam_rspdr_g_spin$membership %in% ccl_pfam_rspdr_g_spin_top_ids
)
ccl_pfam_rspdr_g_spin_top_g <- induced_subgraph(
  ccl_pfam_rspdr_g,
  V(ccl_pfam_rspdr_g)[ccl_pfam_rspdr_g_spin_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_rspdr_g_spin_top <- ccl_pfam_rspdr_g_spin
ccl_pfam_rspdr_g_spin_top$names <-
  ccl_pfam_rspdr_g_spin_top$names[ccl_pfam_rspdr_g_spin_top_v_msk]
ccl_pfam_rspdr_g_spin_top$membership <-
  ccl_pfam_rspdr_g_spin_top$membership[ccl_pfam_rspdr_g_spin_top_v_msk]
ccl_pfam_rspdr_g_spin_top$vcount <- length(ccl_pfam_rspdr_g_spin_top$names)

# Non-Responder +Network InfoMap
ccl_pfam_nspdr_p_g_imap <- cluster_infomap(ccl_pfam_nspdr_p_g)
ccl_pfam_nspdr_p_g_imap_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_nspdr_p_g_imap), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_nspdr_p_g_imap_top_v_msk <- (
  ccl_pfam_nspdr_p_g_imap$membership %in% ccl_pfam_nspdr_p_g_imap_top_ids
)
ccl_pfam_nspdr_p_g_imap_top_g <- induced_subgraph(
  ccl_pfam_nspdr_p_g,
  V(ccl_pfam_nspdr_p_g)[ccl_pfam_nspdr_p_g_imap_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_nspdr_p_g_imap_top <- ccl_pfam_nspdr_p_g_imap
ccl_pfam_nspdr_p_g_imap_top$names <-
  ccl_pfam_nspdr_p_g_imap_top$names[ccl_pfam_nspdr_p_g_imap_top_v_msk]
ccl_pfam_nspdr_p_g_imap_top$membership <-
  ccl_pfam_nspdr_p_g_imap_top$membership[ccl_pfam_nspdr_p_g_imap_top_v_msk]
ccl_pfam_nspdr_p_g_imap_top$vcount <- length(ccl_pfam_nspdr_p_g_imap_top$names)

# Non-Responder +Network SpinGlass
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_nspdr_p_g_spin_file <- "data/ccl_pfam_nspdr_p_g_spin.Rda"
if (!file.exists(ccl_pfam_nspdr_p_g_spin_file)) {
  ccl_pfam_nspdr_p_g_spin <- cluster_spinglass(ccl_pfam_nspdr_p_g)
  save(ccl_pfam_nspdr_p_g_spin, file = ccl_pfam_nspdr_p_g_spin_file)
} else {
  load(ccl_pfam_nspdr_p_g_spin_file)
}
ccl_pfam_nspdr_p_g_spin_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_nspdr_p_g_spin), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_nspdr_p_g_spin_top_v_msk <- (
  ccl_pfam_nspdr_p_g_spin$membership %in% ccl_pfam_nspdr_p_g_spin_top_ids
)
ccl_pfam_nspdr_p_g_spin_top_g <- induced_subgraph(
  ccl_pfam_nspdr_p_g,
  V(ccl_pfam_nspdr_p_g)[ccl_pfam_nspdr_p_g_spin_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_nspdr_p_g_spin_top <- ccl_pfam_nspdr_p_g_spin
ccl_pfam_nspdr_p_g_spin_top$names <-
  ccl_pfam_nspdr_p_g_spin_top$names[ccl_pfam_nspdr_p_g_spin_top_v_msk]
ccl_pfam_nspdr_p_g_spin_top$membership <-
  ccl_pfam_nspdr_p_g_spin_top$membership[ccl_pfam_nspdr_p_g_spin_top_v_msk]
ccl_pfam_nspdr_p_g_spin_top$vcount <- length(ccl_pfam_nspdr_p_g_spin_top$names)

# Non-Responder +/-Network SpinGlass
# SpinGlass takes a long time to run (load cached if available)
ccl_pfam_nspdr_g_spin_file <- "data/ccl_pfam_nspdr_g_spin.Rda"
if (!file.exists(ccl_pfam_nspdr_g_spin_file)) {
  ccl_pfam_nspdr_g_spin <- cluster_spinglass(ccl_pfam_nspdr_g,
                                             implementation = "neg")
  save(ccl_pfam_nspdr_g_spin, file = ccl_pfam_nspdr_g_spin_file)
} else {
  load(ccl_pfam_nspdr_g_spin_file)
}
ccl_pfam_nspdr_g_spin_top_ids <- sort(as.numeric(names(
  sort(sizes(ccl_pfam_nspdr_g_spin), decreasing = TRUE)[1:num_top_clusts]
)))
ccl_pfam_nspdr_g_spin_top_v_msk <- (
  ccl_pfam_nspdr_g_spin$membership %in% ccl_pfam_nspdr_g_spin_top_ids
)
ccl_pfam_nspdr_g_spin_top_g <- induced_subgraph(
  ccl_pfam_nspdr_g,
  V(ccl_pfam_nspdr_g)[ccl_pfam_nspdr_g_spin_top_v_msk]
)
# iGraph has no functionality to subset community objects so hack it
ccl_pfam_nspdr_g_spin_top <- ccl_pfam_nspdr_g_spin
ccl_pfam_nspdr_g_spin_top$names <-
  ccl_pfam_nspdr_g_spin_top$names[ccl_pfam_nspdr_g_spin_top_v_msk]
ccl_pfam_nspdr_g_spin_top$membership <-
  ccl_pfam_nspdr_g_spin_top$membership[ccl_pfam_nspdr_g_spin_top_v_msk]
ccl_pfam_nspdr_g_spin_top$vcount <- length(ccl_pfam_nspdr_g_spin_top$names)

par(mfrow = c(6, 2), mar = c(1, 1, 3, 1))
plot(
  ccl_pfam_rspdr_p_g_imap,
  ccl_pfam_rspdr_p_g,
  layout = ccl_pfam_rspdr_p_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM Responder CCLasso +Network InfoMap Clusters"
)
plot(
  ccl_pfam_rspdr_p_g_imap_top,
  ccl_pfam_rspdr_p_g_imap_top_g,
  col = membership(ccl_pfam_rspdr_p_g_imap)[ccl_pfam_rspdr_p_g_imap_top_v_msk],
  layout = ccl_pfam_rspdr_p_g_coord[ccl_pfam_rspdr_p_g_imap_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(
    ccl_pfam_rspdr_p_g_imap
  )), alpha = 1)[ccl_pfam_rspdr_p_g_imap_top_ids],
  mark.col = rainbow(length(communities(
    ccl_pfam_rspdr_p_g_imap
  )), alpha = 0.3)[ccl_pfam_rspdr_p_g_imap_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)
plot(
  ccl_pfam_nspdr_p_g_imap,
  ccl_pfam_nspdr_p_g,
  layout = ccl_pfam_nspdr_p_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM Non-Responder CCLasso +Network InfoMap Clusters"
)
plot(
  ccl_pfam_nspdr_p_g_imap_top,
  ccl_pfam_nspdr_p_g_imap_top_g,
  col = membership(ccl_pfam_nspdr_p_g_imap)[ccl_pfam_nspdr_p_g_imap_top_v_msk],
  layout = ccl_pfam_nspdr_p_g_coord[ccl_pfam_nspdr_p_g_imap_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(
    ccl_pfam_nspdr_p_g_imap
  )), alpha = 1)[ccl_pfam_nspdr_p_g_imap_top_ids],
  mark.col = rainbow(length(communities(
    ccl_pfam_nspdr_p_g_imap
  )), alpha = 0.3)[ccl_pfam_nspdr_p_g_imap_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)
plot(
  ccl_pfam_rspdr_p_g_spin,
  ccl_pfam_rspdr_p_g,
  layout = ccl_pfam_rspdr_p_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM Responder CCLasso +Network SpinGlass Clusters"
)
plot(
  ccl_pfam_rspdr_p_g_spin_top,
  ccl_pfam_rspdr_p_g_spin_top_g,
  col = membership(ccl_pfam_rspdr_p_g_spin)[ccl_pfam_rspdr_p_g_spin_top_v_msk],
  layout = ccl_pfam_rspdr_p_g_coord[ccl_pfam_rspdr_p_g_spin_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(
    ccl_pfam_rspdr_p_g_spin
  )),
  alpha = 1)[ccl_pfam_rspdr_p_g_spin_top_ids],
  mark.col = rainbow(length(communities(
    ccl_pfam_rspdr_p_g_spin
  )),
  alpha = 0.3)[ccl_pfam_rspdr_p_g_spin_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)
plot(
  ccl_pfam_nspdr_p_g_spin,
  ccl_pfam_nspdr_p_g,
  layout = ccl_pfam_nspdr_p_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM Non-Responder CCLasso +Network SpinGlass Clusters"
)
plot(
  ccl_pfam_nspdr_p_g_spin_top,
  ccl_pfam_nspdr_p_g_spin_top_g,
  col = membership(ccl_pfam_nspdr_p_g_spin)[ccl_pfam_nspdr_p_g_spin_top_v_msk],
  layout = ccl_pfam_nspdr_p_g_coord[ccl_pfam_nspdr_p_g_spin_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(
    ccl_pfam_nspdr_p_g_spin
  )),
  alpha = 1)[ccl_pfam_nspdr_p_g_spin_top_ids],
  mark.col = rainbow(length(communities(
    ccl_pfam_nspdr_p_g_spin
  )),
  alpha = 0.3)[ccl_pfam_nspdr_p_g_spin_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)
plot(
  ccl_pfam_rspdr_g_spin,
  ccl_pfam_rspdr_g,
  layout = ccl_pfam_rspdr_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM Responder CCLasso +/-Network SpinGlass Clusters"
)
plot(
  ccl_pfam_rspdr_g_spin_top,
  ccl_pfam_rspdr_g_spin_top_g,
  col = membership(ccl_pfam_rspdr_g_spin)[ccl_pfam_rspdr_g_spin_top_v_msk],
  layout = ccl_pfam_rspdr_g_coord[ccl_pfam_rspdr_g_spin_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(
    ccl_pfam_rspdr_g_spin
  )),
  alpha = 1)[ccl_pfam_rspdr_g_spin_top_ids],
  mark.col = rainbow(length(communities(
    ccl_pfam_rspdr_g_spin
  )),
  alpha = 0.3)[ccl_pfam_rspdr_g_spin_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)
plot(
  ccl_pfam_nspdr_g_spin,
  ccl_pfam_nspdr_g,
  layout = ccl_pfam_nspdr_g_coord,
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = "PFAM Non-Responder CCLasso +/-Network SpinGlass Clusters"
)
plot(
  ccl_pfam_nspdr_g_spin_top,
  ccl_pfam_nspdr_g_spin_top_g,
  col = membership(ccl_pfam_nspdr_g_spin)[ccl_pfam_nspdr_g_spin_top_v_msk],
  layout = ccl_pfam_nspdr_g_coord[ccl_pfam_nspdr_g_spin_top_v_msk, , drop = F],
  mark.border = rainbow(length(communities(
    ccl_pfam_nspdr_g_spin
  )),
  alpha = 1)[ccl_pfam_nspdr_g_spin_top_ids],
  mark.col = rainbow(length(communities(
    ccl_pfam_nspdr_g_spin
  )),
  alpha = 0.3)[ccl_pfam_nspdr_g_spin_top_ids],
  vertex.size = 4,
  vertex.label.cex = 0.25,
  main = paste("Top", num_top_clusts, "Clusters")
)

Save cluster results:

# Responder
ccl_pfam_rspdr_p_g_imap_data <- data.frame()
for (i in 1:max(ccl_pfam_rspdr_p_g_imap$membership)) {
  ccl_pfam_rspdr_p_g_imap_data <- rbind(
    ccl_pfam_rspdr_p_g_imap_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_rspdr_p_g)$name[
        ccl_pfam_rspdr_p_g_imap$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_rspdr_p_g_imap_data <-
  merge.easy(ccl_pfam_rspdr_p_g_imap_data, pfam_feat, key = "Accession")
write.csv(ccl_pfam_rspdr_p_g_imap_data,
          file = "results/PfamRpr_CCLassoPosNet_InfoMapClusters.csv",
          row.names = FALSE)
ccl_pfam_rspdr_p_g_spin_data <- data.frame()
for (i in 1:max(ccl_pfam_rspdr_p_g_spin$membership)) {
  ccl_pfam_rspdr_p_g_spin_data <- rbind(
    ccl_pfam_rspdr_p_g_spin_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_rspdr_p_g)$name[
        ccl_pfam_rspdr_p_g_spin$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_rspdr_p_g_spin_data <-
  merge.easy(ccl_pfam_rspdr_p_g_spin_data, pfam_feat, key = "Accession")
write.csv(ccl_pfam_rspdr_p_g_spin_data,
          file = "results/PfamRpr_CCLassoPosNet_SpinGlassClusters.csv",
          row.names = FALSE)
ccl_pfam_rspdr_g_spin_data <- data.frame()
for (i in 1:max(ccl_pfam_rspdr_g_spin$membership)) {
  ccl_pfam_rspdr_g_spin_data <- rbind(
    ccl_pfam_rspdr_g_spin_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_rspdr_g)$name[
        ccl_pfam_rspdr_g_spin$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_rspdr_g_spin_data <-
  merge.easy(ccl_pfam_rspdr_g_spin_data, pfam_feat, key = "Accession")
write.csv(ccl_pfam_rspdr_g_spin_data,
          file = "results/PfamRpr_CCLassoNet_SpinGlassClusters.csv")

# Non-Responder
ccl_pfam_nspdr_p_g_imap_data <- data.frame()
for (i in 1:max(ccl_pfam_nspdr_p_g_imap$membership)) {
  ccl_pfam_nspdr_p_g_imap_data <- rbind(
    ccl_pfam_nspdr_p_g_imap_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_nspdr_p_g)$name[
        ccl_pfam_nspdr_p_g_imap$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_nspdr_p_g_imap_data <-
  merge.easy(ccl_pfam_nspdr_p_g_imap_data, pfam_feat, key = "Accession")
write.csv(ccl_pfam_nspdr_p_g_imap_data,
          file = "results/PfamNpr_CCLassoPosNet_InfoMapClusters.csv",
          row.names = FALSE)
ccl_pfam_nspdr_p_g_spin_data <- data.frame()
for (i in 1:max(ccl_pfam_nspdr_p_g_spin$membership)) {
  ccl_pfam_nspdr_p_g_spin_data <- rbind(
    ccl_pfam_nspdr_p_g_spin_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_nspdr_p_g)$name[
        ccl_pfam_nspdr_p_g_spin$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_nspdr_p_g_spin_data <-
  merge.easy(ccl_pfam_nspdr_p_g_spin_data, pfam_feat, key = "Accession")
write.csv(ccl_pfam_nspdr_p_g_spin_data,
          file = "results/PfamNpr_CCLassoPosNet_SpinGlassClusters.csv",
          row.names = FALSE)
ccl_pfam_nspdr_g_spin_data <- data.frame()
for (i in 1:max(ccl_pfam_nspdr_g_spin$membership)) {
  ccl_pfam_nspdr_g_spin_data <- rbind(
    ccl_pfam_nspdr_g_spin_data,
    data.frame(
      Cluster = i,
      Accession = V(ccl_pfam_nspdr_g)$name[
        ccl_pfam_nspdr_g_spin$membership == i
      ],
      stringsAsFactors = FALSE
    )
  )
}
ccl_pfam_nspdr_g_spin_data <-
  merge.easy(ccl_pfam_nspdr_g_spin_data, pfam_feat, key = "Accession")
write.csv(ccl_pfam_nspdr_g_spin_data,
          file = "results/PfamNpr_CCLassoNet_SpinGlassClusters.csv",
          row.names = FALSE)